import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import plotly.express as px
import plotly.io as pio
hike_df = pd.read_csv('hike_data.csv')
speed_df = pd.read_csv('speed_data.csv')
hike_df.head()
| Latitude | Longitude | Elevation | |
|---|---|---|---|
| 0 | 51.20257 | -115.59737 | 1691.0 |
| 1 | 51.20259 | -115.59734 | 1691.0 |
| 2 | 51.20262 | -115.59732 | 1692.0 |
| 3 | 51.20265 | -115.59730 | 1692.0 |
| 4 | 51.20268 | -115.59729 | 1693.0 |
speed_df.head()
| kilometers | avg_speed | elevation_gain | |
|---|---|---|---|
| 0 | 1 | 4.82 | 37 m |
| 1 | 2 | 5.10 | 58 m |
| 2 | 3 | 4.89 | 18 m |
| 3 | 4 | 4.75 | 103 m |
| 4 | 5 | 4.06 | 157 m |
hike_df.dtypes
Latitude float64 Longitude float64 Elevation float64 dtype: object
speed_df.dtypes
kilometers int64 avg_speed float64 elevation_gain object dtype: object
speed_df['elevation_gain'] = speed_df['elevation_gain'].str.rstrip(' m').astype(float)
speed_df.head()
| kilometers | avg_speed | elevation_gain | |
|---|---|---|---|
| 0 | 1 | 4.82 | 37.0 |
| 1 | 2 | 5.10 | 58.0 |
| 2 | 3 | 4.89 | 18.0 |
| 3 | 4 | 4.75 | 103.0 |
| 4 | 5 | 4.06 | 157.0 |
speed_df.dtypes
kilometers int64 avg_speed float64 elevation_gain float64 dtype: object
hike_df.isna().sum()
Latitude 0 Longitude 0 Elevation 0 dtype: int64
speed_df.isna().sum()
kilometers 0 avg_speed 0 elevation_gain 0 dtype: int64
hike_df.describe()
| Latitude | Longitude | Elevation | |
|---|---|---|---|
| count | 5023.000000 | 5023.000000 | 5023.000000 |
| mean | 51.224317 | -115.598374 | 1959.351185 |
| std | 0.008697 | 0.012297 | 362.045170 |
| min | 51.202570 | -115.612320 | 1566.000000 |
| 25% | 51.219150 | -115.608430 | 1643.000000 |
| 50% | 51.226040 | -115.602320 | 1813.000000 |
| 75% | 51.231445 | -115.590180 | 2264.000000 |
| max | 51.236140 | -115.569570 | 2737.000000 |
speed_df.describe()
| kilometers | avg_speed | elevation_gain | |
|---|---|---|---|
| count | 19.000000 | 19.000000 | 19.000000 |
| mean | 10.000000 | 4.650000 | 79.157895 |
| std | 5.627314 | 2.040319 | 77.053562 |
| min | 1.000000 | 1.480000 | 0.000000 |
| 25% | 5.500000 | 3.280000 | 2.000000 |
| 50% | 10.000000 | 4.820000 | 58.000000 |
| 75% | 14.500000 | 5.165000 | 153.500000 |
| max | 19.000000 | 9.050000 | 208.000000 |
distance = 19.05
num_row = len(hike_df) - 1
distance_per_row = distance/num_row
hike_df['Distance'] = hike_df.index * distance_per_row
hike_df
| Latitude | Longitude | Elevation | Distance | |
|---|---|---|---|---|
| 0 | 51.20257 | -115.59737 | 1691.0 | 0.000000 |
| 1 | 51.20259 | -115.59734 | 1691.0 | 0.003793 |
| 2 | 51.20262 | -115.59732 | 1692.0 | 0.007587 |
| 3 | 51.20265 | -115.59730 | 1692.0 | 0.011380 |
| 4 | 51.20268 | -115.59729 | 1693.0 | 0.015173 |
| ... | ... | ... | ... | ... |
| 5018 | 51.20312 | -115.59774 | 1689.0 | 19.034827 |
| 5019 | 51.20309 | -115.59772 | 1689.0 | 19.038620 |
| 5020 | 51.20307 | -115.59768 | 1690.0 | 19.042413 |
| 5021 | 51.20304 | -115.59765 | 1690.0 | 19.046207 |
| 5022 | 51.20299 | -115.59759 | 1691.0 | 19.050000 |
5023 rows × 4 columns
check = hike_df[hike_df['Elevation'] == 2737]
check
| Latitude | Longitude | Elevation | Distance | |
|---|---|---|---|---|
| 2733 | 51.22513 | -115.56963 | 2737.0 | 10.367115 |
fig, ax = plt.subplots(1,1,figsize=(8,6))
ax.plot(hike_df['Distance'], hike_df['Elevation'])
ax.set_title("Cascade Mountain (2998 meters): Hiking Elevation Gain")
ax.set_ylabel("Elevation (meters)")
ax.set_xlabel("Distance (km)")
plt.savefig("LineChart-Cascade.png")
plt.show()
fig, ax = plt.subplots(1,1,figsize=(8,6))
scatter = ax.scatter(hike_df['Longitude'], hike_df['Latitude'], c=hike_df['Elevation'], cmap='plasma')
plt.colorbar(scatter, label='Elevation (meters)')
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
ax.set_title("Longitude, Latitdue Scatterplot w/ Elevation Colorbar")
plt.savefig("2dScatterplot-Cascade1.png")
plt.show()
fig = px.scatter_mapbox(hike_df, lat='Latitude', lon='Longitude',
width=850, zoom=12, mapbox_style='open-street-map')
plt.savefig("2dScatter-Cascade.png")
fig.show()
<Figure size 640x480 with 0 Axes>
fig = px.scatter_3d(hike_df, x='Longitude', y='Latitude', z='Elevation', color='Elevation')
fig.update_layout(
title='Latitude, Longitude, and Elevation: Cascade Mountain',
autosize=False,
width=1000,
height=800,
)
pio.write_html(fig, "3dScatterplot-Cascade.html")
fig.show()
rows_per_km = np.round(num_row/distance).astype(int)
rows_per_km
264
def calculate_elevation_change(row1, row2, df):
return df.at[row2, 'Elevation'] - df.at[row1, 'Elevation']
intervals = range(0, num_row, rows_per_km)
elevation_change = []
for i in range(len(intervals) - 1):
start_row = intervals[i]
end_row = intervals[i + 1]
change = calculate_elevation_change(start_row, end_row, hike_df)
elevation_change.append(change)
speed_df['elevation_change'] = elevation_change
speed_df
| kilometers | avg_speed | elevation_gain | elevation_change | |
|---|---|---|---|---|
| 0 | 1 | 4.82 | 37.0 | -50.0 |
| 1 | 2 | 5.10 | 58.0 | -10.0 |
| 2 | 3 | 4.89 | 18.0 | -64.0 |
| 3 | 4 | 4.75 | 103.0 | 99.0 |
| 4 | 5 | 4.06 | 157.0 | 138.0 |
| 5 | 6 | 3.45 | 150.0 | 162.0 |
| 6 | 7 | 3.69 | 163.0 | 157.0 |
| 7 | 8 | 2.27 | 189.0 | 182.0 |
| 8 | 9 | 2.01 | 208.0 | 200.0 |
| 9 | 10 | 1.48 | 202.0 | 168.0 |
| 10 | 11 | 2.81 | 3.0 | -79.0 |
| 11 | 12 | 3.11 | 0.0 | -208.0 |
| 12 | 13 | 6.11 | 0.0 | -259.0 |
| 13 | 14 | 7.51 | 0.0 | -219.0 |
| 14 | 15 | 9.05 | 1.0 | -170.0 |
| 15 | 16 | 8.31 | 0.0 | -156.0 |
| 16 | 17 | 4.86 | 84.0 | 49.0 |
| 17 | 18 | 5.23 | 42.0 | 14.0 |
| 18 | 19 | 4.84 | 89.0 | 44.0 |
bar_width = 0.2
positions = range(len(speed_df['kilometers']))
fig, ax = plt.subplots(figsize=(10,8))
ax.bar([pos - bar_width for pos in positions], speed_df['elevation_change'], width=bar_width, label='Elevation Change (m)', color='orange')
ax2 = ax.twinx()
ax2.bar(positions, speed_df['avg_speed'], width=bar_width, label='Average Speed (km/h)')
ax.set_ylabel('Elevation Change per KM (meters)')
ax.set_ylim(min(speed_df['elevation_change'] * 1.2), max(speed_df['elevation_change']) * 1.2)
ax2.set_ylabel('Average Speed per KM (km/h)')
ax2.set_ylim(min(speed_df['elevation_change'] * 0.1), max(speed_df['elevation_change']) * 0.1)
ax.set_xticks(positions)
ax.set_xticklabels(speed_df['kilometers'].astype(str))
ax.set_xlabel("Distance in Kilometers")
lines, labels = ax.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax.legend(lines + lines2, labels + labels2, loc='upper right')
plt.title('Elevation Change and Average Speed for each Kilometer (Cascade Hike analysis)')
plt.savefig("ElevationSpeed-Cascade.png")
plt.show()